EEA AQ Stations

Author

Johannes Heisig

Published

October 5, 2023

library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyr)
library(stars)
Loading required package: abind
Loading required package: sf
Linking to GEOS 3.10.2, GDAL 3.4.3, PROJ 8.2.1; sf_use_s2() is TRUE
library(terra)
terra 1.7.46

Attaching package: 'terra'
The following object is masked from 'package:tidyr':

    extract

Stations

Create a table with rows for each station and pollutant recorded. Stations have a type (background, industrial, traffic), an area label (rural, urban, suburban), and coordinates in lon/lat.

station_meta = read.table("PanEuropean_metadata.csv", sep = "\t", header = T) |> 
  dplyr::select(AirQualityStationEoICode, Countrycode, Longitude, Latitude,
                StationType = AirQualityStationType, 
                StationArea = AirQualityStationArea,
                AirPollutant = AirPollutantCode) |> 
  dplyr::mutate(AirPollutant = factor(basename(AirPollutant),
                                      levels = c(6001, 5, 8, 7),
                                      labels = c("PM2.5", "PM10", "NO2", "O3"))) |> 
  dplyr::filter(!is.na(AirPollutant)) |> 
  unique() |> 
  dplyr::mutate(AirQualityStationEoICode = as.factor(AirQualityStationEoICode),
                Countrycode = as.factor(Countrycode),
                StationType = as.factor(StationType),
                StationArea = as.factor(StationArea)) |> 
  dplyr::group_by(AirQualityStationEoICode, Countrycode, 
                  StationType, StationArea, AirPollutant) |>
  dplyr::summarise(Longitude = mean(Longitude), Latitude = mean(Latitude), .groups = 'drop') |> 
  dplyr::group_by(AirQualityStationEoICode, StationArea, AirPollutant) |> 
  # if two types registered for 1 station, label them as non-background (n=45).
  dplyr::filter(!(dplyr::n() > 1 & !StationType == "background")) |> 
  dplyr::ungroup()

station_laea = 
  select(station_meta, AirQualityStationEoICode, Longitude, Latitude) |> 
  unique() |> 
  st_as_sf(coords = c("Longitude","Latitude"), crs = 4326) |> 
  st_transform(st_crs(3035))

Supplementary Data

Elevation

if (!file.exists("EEA_stations_elevation.parquet")){
  dem = stars::read_stars("/vsicurl/https://s3.eu-central-1.wasabisys.com/eumap/dtm/dtm_elev.lowestmode_gedi.eml_mf_30m_0..0cm_2000..2018_eumap_epsg3035_v0.3.tif")
  
  Sys.setenv(GDAL_CACHEMAX=100,
             GDAL_NUM_THREADS=4,
             GDAL_HTTP_MULTIRANGE = 'YES',
             GDAL_HTTP_MERGE_CONSECUTIVE_RANGES = 'YES',
             GDAL_DISABLE_READDIR_ON_OPEN = 'EMPTY_DIR',
             VSI_CACHE = TRUE,
             CPL_VSIL_CURL_ALLOWED_EXTENSIONS = '.tif',
             GDAL_HTTP_CONNECTTIMEOUT = 36000,
             GDAL_HTTP_TIMEOUT = 36000,
             CPL_VSIL_CURL_USE_HEAD = 'NO',
             CPL_CURL_GZIP = 'NO')
  
  tictoc::tic()
  station_laea$Elevation = stars::st_extract(dem, station_laea) |> pull(1)
  arrow::write_parquet(st_drop_geometry(station_laea), "EEA_stations_elevation.parquet")
  tictoc::toc()
  
} else {
  station_elevation = arrow::read_parquet("EEA_stations_elevation.parquet") |> 
    mutate(Elevation = ifelse(Elevation < -100, NA, Elevation))
}

(station_laea = left_join(station_laea, station_elevation))
Joining with `by = join_by(AirQualityStationEoICode)`
Warning in sf_column %in% names(g): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1608 of `x` matches multiple rows in `y`.
ℹ Row 1608 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
Simple feature collection with 6164 features and 2 fields
Geometry type: POINT
Dimension:     XY
Bounding box:  xmin: -2677029 ymin: -3074758 xmax: 10002190 ymax: 6182022
Projected CRS: ETRS89-extended / LAEA Europe
# A tibble: 6,164 × 3
   AirQualityStationEoICode          geometry Elevation
   <fct>                          <POINT [m]>     <dbl>
 1 4101422                  (5959964 2181947)        NA
 2 4101522                  (5959045 2184719)        NA
 3 AD0942A                  (3625031 2195164)     10444
 4 AD0944A                  (3627252 2195722)     16231
 5 AD0945A                  (3639878 2196313)     24894
 6 AL0203A                  (5233926 2012682)      8455
 7 AL0204A                  (5127863 1973522)      2143
 8 AL0205A                  (5113104 2073920)        46
 9 AL0206A                  (5106425 2184055)      7169
10 AL0207A                  (5168620 2057815)      1272
# ℹ 6,154 more rows

Corine Land Cover

clc_8class = "supplementary/CLC_reclass_8.tif"
if (!file.exists(clc_8class)){
  lc = rast("supplementary/lcv_landcover.clc_corine_c_100m_0..0cm_2018_eumap_epsg3035_v2020.tif")
  NAflag(lc) = 128
  summary(lc)
  rcl = cbind(c(1:48),
              c(1,2,3, rep(4,3), rep(3,3), rep(5,2), rep(6, 11), rep(7,12), rep(8,14)))
  lc8 = classify(lc, rcl, filename = clc_8class,
                 datatype = "INT1U", NAflag = 0, overwrite=T,
                 gdal=c("COMPRESS=DEFLATE"))
} else {
  lc8 = rast("supplementary/CLC_reclass_8.tif")
}
NAflag(lc8) = 128
levels(lc8) =  data.frame(id = c(1:8),
                          CLC8 = c("HDR", "LDR", "IND","TRAF","UGR","AGR","NAT","OTH"))
plot(lc8)

station_laea$CLC8 = extract(lc8, vect(station_laea)) |> pull(2)

Population Density

pop = rast("supplementary/JRC_GRID_2018/JRC_1K_POP_2018.tif")
Warning in new_CppObject_xp(fields$.module, fields$.pointer, ...): GDAL
Message 1: The definition of projected CRS EPSG:3035 got from GeoTIFF keys is
not the same as the one from the EPSG registry, which may cause issues during
reprojection operations. Set GTIFF_SRS_SOURCE configuration option to EPSG to
use official parameters (overriding the ones from GeoTIFF keys), or to GEOKEYS
to use custom values from GeoTIFF keys and drop the EPSG code.
plot(pop, breaks = c(0,5,10,50,100,500,1000))

station_laea$Population = extract(pop, vect(station_laea)) |> pull(2) |> replace_na(0)

Result

(station_meta = left_join(station_meta, station_laea))
Joining with `by = join_by(AirQualityStationEoICode)`
Warning in left_join(station_meta, station_laea): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 3767 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
# A tibble: 15,881 × 11
   AirQualityStationEoICode Countrycode StationType StationArea AirPollutant
   <fct>                    <fct>       <fct>       <fct>       <fct>       
 1 4101422                  TR          industrial  suburban    PM10        
 2 4101422                  TR          industrial  suburban    NO2         
 3 4101522                  TR          industrial  suburban    PM10        
 4 4101522                  TR          industrial  suburban    NO2         
 5 AD0942A                  AD          background  urban       PM2.5       
 6 AD0942A                  AD          background  urban       PM10        
 7 AD0942A                  AD          background  urban       NO2         
 8 AD0942A                  AD          background  urban       O3          
 9 AD0944A                  AD          background  rural       O3          
10 AD0945A                  AD          background  rural       O3          
# ℹ 15,871 more rows
# ℹ 6 more variables: Longitude <dbl>, Latitude <dbl>, geometry <POINT [m]>,
#   Elevation <dbl>, CLC8 <fct>, Population <int>
summary(station_meta)
 AirQualityStationEoICode  Countrycode       StationType  
 IT1170A:   24            IT     :2213   background:9952  
 IT2129A:   16            ES     :2054   industrial:2025  
 TR0066A:   16            DE     :1966   traffic   :3904  
 SE0093A:   12            FR     :1885                    
 IT1090A:    9            TR     :1253                    
 E135019:    8            PL     :1024                    
 (Other):15796            (Other):5486                    
         StationArea   AirPollutant   Longitude          Latitude     
 rural         :1817   PM2.5:2862   Min.   :-63.081   Min.   :-21.34  
 rural-nearcity: 253   PM10 :4725   1st Qu.:  3.363   1st Qu.: 41.72  
 rural-regional: 451   NO2  :5123   Median : 10.791   Median : 46.65  
 rural-remote  : 124   O3   :3171   Mean   : 10.811   Mean   : 46.34  
 suburban      :3602                3rd Qu.: 17.961   3rd Qu.: 50.92  
 urban         :9634                Max.   : 55.628   Max.   : 78.91  
                                                                      
          geometry       Elevation          CLC8        Population   
 POINT        :15881   Min.   :  -52   LDR    :7478   Min.   :    0  
 epsg:3035    :    0   1st Qu.:  328   HDR    :3002   1st Qu.:  172  
 +proj=laea...:    0   Median : 1152   AGR    :2032   Median : 2448  
                       Mean   : 2036   IND    :1357   Mean   : 4132  
                       3rd Qu.: 2737   NAT    : 941   3rd Qu.: 5910  
                       Max.   :30185   (Other): 883   Max.   :51127  
                       NA's   :1420    NA's   : 188                  
table(station_meta$AirPollutant, station_meta$StationType)
       
        background industrial traffic
  PM2.5       1851        323     688
  PM10        2830        672    1223
  NO2         2740        661    1722
  O3          2531        369     271
table(station_meta$AirPollutant, station_meta$StationArea)
       
        rural rural-nearcity rural-regional rural-remote suburban urban
  PM2.5   274             46             83           21      617  1821
  PM10    477             77            104           25     1086  2956
  NO2     551             71            113           30     1095  3263
  O3      515             59            151           48      804  1594
table(station_meta$StationArea, station_meta$StationType)
                
                 background industrial traffic
  rural                1434        348      35
  rural-nearcity        201         38      14
  rural-regional        440          7       4
  rural-remote          124          0       0
  suburban             2307        866     429
  urban                5446        766    3422
station_meta_sf = sf::st_as_sf(station_meta, coords = c("Longitude","Latitude"), crs = 4326)

filter(station_meta_sf, 
       StationType == "background",
       AirPollutant == "NO2") |> 
  mapview::mapview(zcol="StationArea")
filter(station_meta_sf, is.na(CLC8)) |> 
  mapview::mapview(zcol="StationArea")
filter(station_meta_sf, is.na(Elevation)) |> 
  mapview::mapview(zcol="Countrycode")
station_final = filter(station_meta_sf,
                       !is.na(CLC8),
                       !is.na(Elevation))

mapview::mapview(station_final, zcol="CLC8")
summary(station_final)
 AirQualityStationEoICode  Countrycode       StationType  
 IT1170A:   24            IT     :2213   background:8844  
 IT2129A:   16            ES     :2054   industrial:1916  
 SE0093A:   12            DE     :1966   traffic   :3701  
 IT1090A:    9            FR     :1754                    
 E135019:    8            PL     :1024                    
 IT0854A:    8            RO     : 571                    
 (Other):14384            (Other):4879                    
         StationArea   AirPollutant          geometry       Elevation    
 rural         :1709   PM2.5:2555   POINT        :14461   Min.   :  -52  
 rural-nearcity: 248   PM10 :4331   epsg:4326    :    0   1st Qu.:  328  
 rural-regional: 446   NO2  :4733   +proj=long...:    0   Median : 1152  
 rural-remote  : 123   O3   :2842                         Mean   : 2036  
 suburban      :3498                                      3rd Qu.: 2737  
 urban         :8437                                      Max.   :30185  
                                                                         
      CLC8        Population   
 LDR    :7031   Min.   :    0  
 HDR    :2535   1st Qu.:  600  
 AGR    :1967   Median : 2948  
 IND    :1203   Mean   : 4538  
 NAT    : 910   3rd Qu.: 6381  
 UGR    : 464   Max.   :51127  
 (Other): 351                  

Write files

bind_cols(st_drop_geometry(station_final), 
      data.frame(st_coordinates(station_final)) |> 
        setNames(c("Longitude","Latitude"))) |> 
  arrow::write_parquet("EEA_stations_meta.parquet")

geoarrow::write_geoparquet(station_final, "EEA_stations_meta_sf.parquet")